options(repos = c(CRAN = "https://cran.r-project.org"))
if(!require('pacman')) install.packages('pacman')
## Loading required package: pacman
pacman::p_load(tidyverse, here, janitor, dplyr, knitr, lubridate, reactable, gtsummary)
rm(list = ls())

FIRST STEPS

Dataset manipulation in ‘bl_schisto_long_25052024_BW’.

load("data/bl_schisto_wide_25052024_BW.Rdata")
reactable(bl_schisto_wide_25052024_BW)

Exclusion of HIV-HBV co-infected patients

library(dplyr)
library(haven)
library(labelled)

bl_schisto_wide_25052024_BW$hiv_status <- as.character(as_factor(bl_schisto_wide_25052024_BW$hiv_status))

bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_BW %>% 
  filter(hiv_status == "HBV")
rm(list = "bl_schisto_wide_25052024_BW")
reactable(bl_schisto_wide_25052024_noCoInf)
# what values do we find in the 'hiv_status' column ?
unique_values <- unique(bl_schisto_wide_25052024_noCoInf$hiv_status)
print(unique_values)
## [1] "HBV"
# patient count
bl_schisto_wide_25052024_noCoInf %>%
  arrange(pid) %>% 
  distinct(pid) %>%
  nrow()
## [1] 799

Create ‘CCA_Fb’ dataset with HBV-monoinfected patients who have been tested for CCA (n = 721)

CCA_Fb <- bl_schisto_wide_25052024_noCoInf %>%
  select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>% 
  filter(!is.na(cca_res))

reactable(CCA_Fb)
# checking for duplicates
CCA_Fb %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>,
## #   fibs_med_res <dbl>, fibs_iqr_res <dbl>, cap_med_res <dbl>,
## #   cap_iqr_res <dbl>, dupe_count <int>

Create ‘SENB_noCCA’ dataset with patients (both mono- & co-infected) who have never been tested for CCA (n = 127)

# load dataset before exclusion of HIV/HBV patients
load("data/bl_schisto_wide_25052024_BW.Rdata")

SENB_noCCA <- bl_schisto_wide_25052024_BW %>%
  select(pid, enrol_d, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
  rename(fibs_med = fibs_med_res) %>% 
  rename(fibs_iqr = fibs_iqr_res) %>% 
  rename(cap_med = cap_med_res) %>% 
  rename(cap_iqr = cap_iqr_res) %>% 
  filter(is.na(cca_res))

reactable(SENB_noCCA)
# checking for duplicates
SENB_noCCA %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, enrol_d, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 9
## # ℹ 9 variables: pid <chr>, enrol_d <date>, cca_res <fct>, cca_bl_date <date>,
## #   fibs_med <dbl>, fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>,
## #   dupe_count <int>
rm(list = "bl_schisto_wide_25052024_BW")

List of SENB patients to be tested for CCA

patient_list <- as.list(SENB_noCCA$pid)
cat(paste(patient_list, collapse = ", "))
## SENB1082, SENB1205, SENB1212, SENB1225, SENB1227, SENB1241, SENB1275, SENB1280, SENB1281, SENB1492, SENB1496, SENB1512, SENB1536, SENB1541, SENB1555, SENB1608, SENB1632, SENB1705, SENB2001, SENB2003, SENB2006, SENB2008, SENB2009, SENB2010, SENB2011, SENB2014, SENB2015, SENB2016, SENB2017, SENB2018, SENB2019, SENB2021, SENB2022, SENB2024, SENB2025, SENB2026, SENB2028, SENB2029, SENB2030, SENB2031, SENB2032, SENB2033, SENB2034, SENB2035, SENB2038, SENB2040, SENB2041, SENB2042, SENB2043, SENB2044, SENB2045, SENB2049, SENB2050, SENB2051, SENB2053, SENB2054, SENB2056, SENB2092, SENB2106, SENB2107, SENB2108, SENB2109, SENB2111, SENB2112, SENB2116, SENB2121, SENB2124, SENB2146, SENB2148, SENB2149, SENB2150, SENB2151, SENB2152, SENB2153, SENB2154, SENB2155, SENB2156, SENB2157, SENB2158, SENB2159, SENB2160, SENB2161, SENB2162, SENB2163, SENB2164, SENB2165, SENB2166, SENB2167, SENB2168, SENB2174, SENB2175, SENB2176, SENB2177, SENB2178, SENB2179, SENB2180, SENB2181, SENB2182, SENB2183, SENB2184, SENB2185, SENB2186, SENB2187, SENB2188, SENB2189, SENB2190, SENB2191, SENB2192, SENB2193, SENB2194, SENB2195, SENB2196, SENB2197, SENB2198, SENB2199, SENB2200, SENB2201, SENB2202, SENB2203, SENB2204, SENB2205, SENB2206, SENB2207, SENB2208, SENB2209, SENB2217, SENB2218

We want a table with the pids but also the enrolment date for the FibroScan so we can find the Urine samples easily in the lab

# import long table
load("data/bl_schisto_long_25052024_BW.Rdata")

# create 'CCA_Fb_long' dataset with variables of interest only
CCA_Fb_long <- bl_schisto_long_25052024_BW %>%   
  select(pid, enrol_d, test_dt, cca_res, fibs_med, fibs_iqr, cap_med, cap_iqr, hiv_status)

# unlabelling 'cca_res'
CCA_Fb_long$cca_res <- as.character(as_factor(CCA_Fb_long$cca_res))

# unlabelling 'hiv_status'
CCA_Fb_long$hiv_status <- as.character(as_factor(CCA_Fb_long$hiv_status))

# create 'df2' dataset with fibs_date
df2 <- CCA_Fb_long %>%
  group_by(pid) %>%
  mutate(fibs_date = case_when(!is.na(fibs_med) & !is.na(cap_med) ~ test_dt)) %>% # create variable 'fibs_date'
  fill(fibs_date, .direction = "downup") %>%
  ungroup() %>%
  filter(!is.na(fibs_date)) %>% 
  filter(is.na(cca_res)) %>% 
  select(-test_dt)

# join both datasets
patient_table <- inner_join(df2, SENB_noCCA) %>% 
  select(-cca_bl_date) %>%
  select(pid, enrol_d, cca_res, hiv_status)
## Joining with `by = join_by(pid, enrol_d, cca_res, fibs_med, fibs_iqr, cap_med,
## cap_iqr)`
reactable(patient_table)
write.csv(patient_table, file = "SENB_noCCA_n127.csv")
# delete unnecessary datasets
rm(list = "bl_schisto_long_25052024_BW")
rm(list = "df2", "CCA_Fb_long")

Create ‘SENBmono_noCCA’ dataset with (mono-infected) patients who have never been tested for CCA (n = 78)

SENBmono_noCCA <- bl_schisto_wide_25052024_noCoInf %>%
  select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
  rename(fibs_med = fibs_med_res) %>% 
  rename(fibs_iqr = fibs_iqr_res) %>% 
  rename(cap_med = cap_med_res) %>% 
  rename(cap_iqr = cap_iqr_res) %>% 
  filter(is.na(cca_res))

reactable(SENBmono_noCCA)
# checking for duplicates
SENBmono_noCCA %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>, fibs_med <dbl>,
## #   fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>, dupe_count <int>

SOCIO-DEMOGRAPHICS

Create temporary dataset with all socio-demographic variables

sociodemo1 <- bl_schisto_wide_25052024_noCoInf %>%
  select(1:23) %>%
  mutate(country_orig = case_when(is.na(country_orig) ~ "Senegal",              # rename all 'NA' values in `country_orig` to 'Senegal'
                                  !is.na(country_orig) ~ country_orig)) %>%
  mutate(region_orig = case_when(!is.na(region_orig) ~ region_orig,             # rename all 'NA' values in `region_orig` to their country of origin
                                 is.na(region_orig) ~ country_orig)) %>%
  mutate(dpt_orig = case_when(!is.na(dpt_orig) ~ dpt_orig,                      # rename all 'NA' values in `dpt_orig` to their country of origin
                                 is.na(dpt_orig) ~ country_orig)) %>%
  select(-sen_yn)

reactable(sociodemo1)

Select for variables of interest only

sociodemo2 <- sociodemo1 %>% 
  select(pid, age_en, sex, country_orig, region_orig, dpt_orig, reg_act, employ, other_employ) %>% 
  mutate(employ = case_when(employ != "other" ~ employ,
                            employ == "other" ~ str_to_lower(other_employ))) %>% 
  select(-other_employ)

reactable(sociodemo2)
rm(list = "sociodemo1")

Analysis

# age
## define median
median_age <- median(sociodemo2$age_en, na.rm = TRUE)

## define categories
age_cat <- sociodemo2 %>% 
  mutate(age_en = case_when(age_en < 31 ~ "18-31",
                         age_en >= 31 ~ ">31"))
age_cat %>%
  tabyl(age_en) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  age_en   n percent
##   18-31 384  48.06%
##     >31 415  51.94%
##   Total 799 100.00%
# sex
sociodemo2 %>% 
  tabyl(sex) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     sex   n percent
##    Male 427  53.44%
##  Female 372  46.56%
##   Total 799 100.00%
# origin
## transform "Diurbel " into "Diurbel"
sociodemo2$dpt_orig <- trimws(sociodemo2$dpt_orig)

## define categories
orig_cat <- sociodemo2 %>% 
  mutate(orig_endemicity = case_when(
    region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
    region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis") ~ "≥50% prevalence (high)",
    dpt_orig %in% c("Bounkiling", "Sédhiou", "Medina Yoro Foulah", "Linguère", "Louga") ~ "≥50% prevalence (high)",
    region_orig == "Kaffrine" ~ "10-49% prevalence (moderate)",
    dpt_orig %in% c("Bignognal", "Oussouye", "Kolda", "Vélingara", "Kaolack", "Fatick", "Gossas",
                    "Diurbel", "Bambey", "M'bour") ~ "10-49% prevalence (moderate)",
    dpt_orig %in% c("Ziguinchor", "Nioro du Rip", "Guinguinéo", "Foundiougne", "Mbacké", "Tivaouane", "Thiès", "Kébémer") ~ "1-9.9% prevalence (low)",
    region_orig == "Dakar" ~ "<1% prevalence (non-endemic)",
    dpt_orig == "Goudomp" ~ "<1% prevalence (non-endemic)"
  ))

orig_cat %>%
  tabyl(orig_endemicity) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##               orig_endemicity   n percent valid_percent
##       1-9.9% prevalence (low) 183  22.90%        22.93%
##  10-49% prevalence (moderate) 245  30.66%        30.70%
##  <1% prevalence (non-endemic) 273  34.17%        34.21%
##                 Other Country  17   2.13%         2.13%
##        ≥50% prevalence (high)  80  10.01%        10.03%
##                          <NA>   1   0.13%             -
##                         Total 799 100.00%       100.00%
# reg_act
reg_act_cat <- sociodemo2 %>% 
  mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
                                 reg_act != "Dakar" ~ "Outside"))

reg_act_cat %>% 
  tabyl(reg_act) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  reg_act   n percent
##    Dakar 642  80.35%
##  Outside 157  19.65%
##    Total 799 100.00%
# profession ?

rm(list = "age_cat", "orig_cat", "reg_act_cat")

Creation of final sociodemographics table

sociodemo_final <- sociodemo2 %>%
  mutate(age_en = case_when(
    age_en < 31 ~ "18-31",
    age_en >= 31 ~ ">31"
  )) %>%
    mutate(orig_endemicity = case_when(
    region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
    region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis") ~ "≥50% prevalence (high)",
    dpt_orig %in% c("Bounkiling", "Sédhiou", "Medina Yoro Foulah", "Linguère", "Louga") ~ "≥50% prevalence (high)",
    region_orig == "Kaffrine" ~ "10-49% prevalence (moderate)",
    dpt_orig %in% c("Bignognal", "Oussouye", "Kolda", "Vélingara", "Kaolack", "Fatick", "Gossas",
                    "Diurbel", "Bambey", "M'bour") ~ "10-49% prevalence (moderate)",
    dpt_orig %in% c("Ziguinchor", "Nioro du Rip", "Guinguinéo", "Foundiougne", "Mbacké", "Tivaouane", "Thiès", "Kébémer") ~ "1-9.9% prevalence (low)",
    region_orig == "Dakar" ~ "<1% prevalence (non-endemic)",
    dpt_orig == "Goudomp" ~ "<1% prevalence (non-endemic)"
  )) %>% 
  mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
                                 reg_act != "Dakar" ~ "Outside")) %>% 
  rename(age_cat = age_en) %>% 
  select(pid, age_cat, sex, country_orig, region_orig, dpt_orig, orig_endemicity, reg_act, employ)

reactable(sociodemo_final)
rm(list = "sociodemo1", "sociodemo2")
## Warning in rm(list = "sociodemo1", "sociodemo2"): object 'sociodemo1' not found

COMORBIDITIES

Create temporary dataset with all comorbidities variables

# recreate variable 'HTA' following definition : Confirmed arterial systolic pressure ≥140 mmHg and/or arterial diastolic pressure ≥90 mmHg, or any anti-hypertensive treatment before inclusion in SEN-B (source : WHO, https://www.who.int/news-room/fact-sheets/detail/hypertension, updated 16 March 2023)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>% 
  mutate(HTA_diff = aHTA_start - enrol_d) %>%
  mutate(HTA = case_when(!is.na(HTA_diff) ~ "yes",
                         tas >= 140 ~ "yes",
                         tad >= 90 ~ "yes",
                         TRUE ~ "no")) %>% 
  select(-HTA_diff)

# recreate variable 'diabete' ?
bl_schisto_wide_25052024_noCoInf %>% 
  select(diabete, glu_res) %>% 
  filter(diabete == "yes")
## # A tibble: 22 × 2
##    diabete glu_res
##    <fct>     <dbl>
##  1 yes        1.4 
##  2 yes        1.96
##  3 yes        1.69
##  4 yes        0.7 
##  5 yes        0.75
##  6 yes        2.2 
##  7 yes        0.74
##  8 yes        2.43
##  9 yes        1.63
## 10 yes        0.86
## # ℹ 12 more rows
# create variable 'dyslipidemia' : Total cholesterol >2g/l, HDL-c <0.4 g/l in men and <0.5 g/l in women, triglycerides >1.5 g/l, or LDL-cholesterol >1 g/l (source : https://www.ncbi.nlm.nih.gov/books/NBK560891/, updated 4 March 2024)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>%
  mutate(dyslipidemia = case_when(colt_res > 2 ~ "yes",
                                  trg_res > 1.5 ~ "yes",
                                  ldl_res > 1 ~ "yes",
                                  sex == "Female" & hdl_res < 0.4 ~ "yes",
                                  sex == "Male" & hdl_res < 0.5 ~ "yes",
                                  TRUE ~ "no"))

# round 'BMI' to only 1 digit
bl_schisto_wide_25052024_noCoInf$BMI <- round(bl_schisto_wide_25052024_noCoInf$BMI, 1)

como1 <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, COL, AVC, renal, cancer, liver_dis_FDR, hcc_FDR)

reactable(como1)

Analysis

# alcohol consumption
como1 %>% 
  tabyl(alcohol_cons) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  alcohol_cons   n percent
##            no 744  93.12%
##       yes_now  34   4.26%
##      yes_past  21   2.63%
##         Total 799 100.00%
# audit-c (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10163557/)
audit_cat <- como1 %>% 
  mutate(audit_cat = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
                               sex == "Male" & audit_tot >= 4 ~ "Misuse",
                               audit_tot >= 10 ~ "Addiction",
                               TRUE ~ "Negative"))

audit_cat %>% 
  tabyl(audit_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  audit_cat   n percent
##     Misuse  15   1.88%
##   Negative 784  98.12%
##      Total 799 100.00%
# drugs
como1 %>% 
  tabyl(drugs) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     drugs   n percent
##        no 786  98.37%
##  yes_now    0   0.00%
##  yes_past  13   1.63%
##     Total 799 100.00%
# smoking
como1 %>% 
  tabyl(smoke_yn) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  smoke_yn   n percent
##        no 710  88.86%
##   yes_now  18   2.25%
##  yes_past  71   8.89%
##     Total 799 100.00%
# HTA
como1 %>% 
  tabyl(HTA) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    HTA   n percent
##     no 607  75.97%
##    yes 192  24.03%
##  Total 799 100.00%
# diabete
como1 %>% 
  tabyl(diabete) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  diabete   n percent
##       no 777  97.25%
##      yes  22   2.75%
##    Total 799 100.00%
# BMI (https://www.ncbi.nlm.nih.gov/books/NBK541070/)
BMI_cat <- como1 %>% 
  mutate(BMI_cat = case_when(BMI < 18.5 ~ "underweight",
                             BMI >= 18.5 & BMI < 25 ~ "normal weight",
                             BMI >= 25 & BMI < 30 ~ "overweight",
                             BMI >= 30 ~ "obesity"))

BMI_cat %>% 
  tabyl(BMI_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##        BMI_cat   n percent valid_percent
##  normal weight 459  57.45%        58.03%
##        obesity  59   7.38%         7.46%
##     overweight 156  19.52%        19.72%
##    underweight 117  14.64%        14.79%
##           <NA>   8   1.00%             -
##          Total 799 100.00%       100.00%
# dyslipidemia
como1 %>% 
  tabyl(dyslipidemia) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  dyslipidemia   n percent
##            no 482  60.33%
##           yes 317  39.67%
##         Total 799 100.00%
# variable 'COL' (?)
como1 %>% 
  tabyl(COL) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    COL   n percent
##     no 796  99.62%
##    yes   3   0.38%
##  Total 799 100.00%
# AVC
como1 %>% 
  tabyl(AVC) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    AVC   n percent
##     no 799 100.00%
##    yes   0   0.00%
##  Total 799 100.00%
# variable 'renal' (?)
como1 %>% 
  tabyl(renal) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  renal   n percent
##     no 799 100.00%
##    yes   0   0.00%
##  Total 799 100.00%
# Cancer
como1 %>% 
  tabyl(cancer) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  cancer   n percent
##      no 798  99.87%
##     yes   1   0.13%
##   Total 799 100.00%
# Liver Disease
como1 %>% 
  tabyl(liver_dis_FDR) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  liver_dis_FDR   n percent
##             no 784  98.12%
##            yes  15   1.88%
##          Total 799 100.00%
# CHC Familial
como1 %>% 
  tabyl(hcc_FDR) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  hcc_FDR   n percent
##       no 698  87.36%
##      yes 101  12.64%
##    Total 799 100.00%
# supprimer les objets crées de l'environnement
rm(list = "audit_cat", "BMI_cat")
# supprimer les variables peu intéressantes après l'analyse descriptive
como2 <- como1 %>%
  select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR) %>%
  mutate(audit = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
                               sex == "Male" & audit_tot >= 4 ~ "Misuse",
                               audit_tot >= 10 ~ "Addiction",
                               TRUE ~ "Negative")) %>% 
  mutate(BMI = case_when(BMI < 18.5 ~ "underweight",
                             BMI >= 18.5 & BMI < 25 ~ "normal weight",
                             BMI >= 25 & BMI < 30 ~ "overweight",
                             BMI >= 30 ~ "obesity")) %>% 
  select(pid, sex, alcohol_cons, audit, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)

reactable(como2)
rm(list = "como1")

Join sociodemographics & comorbidities datasets

sociodemo_como <- left_join(sociodemo_final, como2, by = "pid") %>% 
  rename(sex = sex.x) %>% 
  select(-sex.y)
# delete unnecessary tables
rm(list = "sociodemo_final", "como2")

I forgot to add the “dipstick EtG” variable…

bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
##    pid      hiv_status enrol_d    center med   sen_yn country_orig region_orig
##    <chr>    <chr>      <date>     <fct>  <chr> <fct>  <fct>        <fct>      
##  1 SENB1002 HBV        2019-09-30 SMIT   1     yes    <NA>         Fatick     
##  2 SENB1003 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  3 SENB1004 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  4 SENB1005 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  5 SENB1006 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  6 SENB1007 HBV        2019-10-02 SMIT   1     yes    <NA>         Ziguinchor 
##  7 SENB1008 HBV        2019-10-02 SMIT   1     yes    <NA>         Kaolack    
##  8 SENB1009 HBV        2019-10-02 SMIT   1     yes    <NA>         Louga      
##  9 SENB1010 HBV        2019-10-02 SMIT   1     yes    <NA>         Diurbel    
## 10 SENB1011 HBV        2019-10-02 SMIT   1     no     Mali         <NA>       
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## #   dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## #   marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## #   other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## #   hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## #   hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …
# create table with 'pid' and 'etg_res'
dipstick_etg <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, etg_res)

# join 'dipstick_etg' with sociodemo_como
sociodemo_como <- left_join(sociodemo_como, dipstick_etg, by = "pid") %>% 
  select(pid, age_cat, sex, country_orig, region_orig, dpt_orig, orig_endemicity, reg_act, employ, alcohol_cons, audit, etg_res,
         drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)

# delete 'dipstick_etg' table
rm(list = "dipstick_etg")

Now we got the full sociodemographics + comorbidities table

reactable(sociodemo_como)

HBV MARKERS

Create temporary dataset with all ‘HBV markers’ variables

hbv_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, enrol_d, cvb_res, qAgHBs_res, AgHBe_res) 

# create variable "screen_d" with just the year
hbv_markers <- hbv_markers %>% 
  mutate(screen_d = year(enrol_d)) %>% 
  select(-enrol_d) %>% 
  select(pid, screen_d, cvb_res, qAgHBs_res, AgHBe_res)

# categorize variable 'cvb_res' : <20, 20-2000, 2001-20000, >20000 IU/mL (source : https://doi.org/10.1016/S2468-1253(24)00040-2)
hbv_markers <- hbv_markers %>% 
  mutate(cvb_detection = case_when(cvb_res <= 20 ~ "≤20 IU/mL",
                             cvb_res > 20 & cvb_res <= 2000 ~ "21-2000 IU/mL",
                             cvb_res > 2000 & cvb_res < 20000 ~ "2001-20000 IU/mL",
                             TRUE ~ "≥20000 IU/mL")) %>% 
  select(pid, screen_d, cvb_res, cvb_detection, qAgHBs_res, AgHBe_res)

# categorize variable 'qAgHBs_res' : <100, 100-1000, >1000 (source : Ouzan, 2014)
hbv_markers <-  hbv_markers %>% 
  mutate(qAgHBs_cat = case_when(qAgHBs_res < 100 ~ "<100 IU/mL",
                                qAgHBs_res > 1000 ~ ">1000 IU/mL",
                                TRUE ~ "100-1000 IU/mL")) %>% 
  select(pid, screen_d, cvb_res, cvb_detection, qAgHBs_res, qAgHBs_cat, AgHBe_res)

reactable(hbv_markers)

Analysis

# Screening date
hbv_markers %>% 
  tabyl(screen_d) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  screen_d   n percent
##      2019  82  10.26%
##      2020  59   7.38%
##      2021 409  51.19%
##      2022 243  30.41%
##      2023   6   0.75%
##     Total 799 100.00%
# HBV DNA viral load
hbv_markers %>% 
  tabyl(cvb_detection) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     cvb_detection   n percent
##  2001-20000 IU/mL 144  18.02%
##     21-2000 IU/mL 435  54.44%
##         ≤20 IU/mL 150  18.77%
##      ≥20000 IU/mL  70   8.76%
##             Total 799 100.00%
# qAgHBs
hbv_markers %>% 
  tabyl(qAgHBs_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##      qAgHBs_cat   n percent
##  100-1000 IU/mL 101  12.64%
##      <100 IU/mL  71   8.89%
##     >1000 IU/mL 627  78.47%
##           Total 799 100.00%
# AgHbe
hbv_markers %>% 
  tabyl(AgHBe_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AgHBe_res   n percent
##   Positive  19   2.38%
##   Negative 780  97.62%
##      Total 799 100.00%

Join ‘hbv_markers’ with ‘sociodemo_como’

SchistoHBV <- left_join(sociodemo_como, hbv_markers, by = "pid") %>% 
  select(pid, screen_d, age_cat, sex, country_orig, region_orig,
         orig_endemicity, reg_act, employ, alcohol_cons, audit, 
         etg_res, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, 
         liver_dis_FDR, hcc_FDR, cvb_detection, 
         qAgHBs_cat, AgHBe_res)

reactable(SchistoHBV)
rm(list = "sociodemo_como", "hbv_markers")

HCV/HDV MARKERS

Create temporary dataset with all ‘HCV/HDV markers’ variables

hcv_hdv_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, AcVHC_res, AcVHD_res)

reactable(hcv_hdv_markers)

Analysis

# HCV
hcv_hdv_markers %>% 
  tabyl(AcVHC_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AcVHC_res   n percent
##   Positive   2   0.25%
##   Negative 797  99.75%
##      Total 799 100.00%
# HDV
hcv_hdv_markers %>% 
  tabyl(AcVHD_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AcVHD_res   n percent valid_percent
##   Positive  10   1.25%         1.25%
##   Negative 787  98.50%        98.75%
##       <NA>   2   0.25%             -
##      Total 799 100.00%       100.00%

TDF Treatment

Create temporary dataset with all ‘TDF’ variables

bl_schisto_wide_25052024_noCoInf %>% 
  tabyl(on_TDF_bl)
##  on_TDF_bl   n    percent valid_percent
##      FALSE 175 0.21902378     0.7777778
##       TRUE  50 0.06257822     0.2222222
##         NA 574 0.71839800            NA
TDF_ttt <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, enrol_d, on_TDF_bl, TDF_start) %>% 
  mutate(on_TDF_bl = case_when(is.na(on_TDF_bl) ~ "No",
                               on_TDF_bl == "FALSE" ~"No",
                               on_TDF_bl == "TRUE" ~ "Yes")) %>% 
  mutate(TDF_duration = case_when(on_TDF_bl == "No" ~ 0,
                                  on_TDF_bl == "Yes" ~ as.numeric(TDF_start - enrol_d))) %>% 
  select(-TDF_start) %>% 
  select(-enrol_d)

reactable(TDF_ttt)

Analysis

TDF_ttt %>% 
  tabyl(on_TDF_bl) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  on_TDF_bl   n percent
##         No 749  93.74%
##        Yes  50   6.26%
##      Total 799 100.00%
library(broom)

# Filter patients treated before baseline
patients_TDF_bl <- TDF_ttt %>% 
  filter(on_TDF_bl == "Yes")

# Median TDF duration at baseline and confidence interval for the median
summary_TDF_duration <- patients_TDF_bl %>%
  summarise(
    median_TDF_duration = median(abs(as.numeric(TDF_duration)), na.rm = TRUE),
    CI95 = list(broom::tidy(quantile(abs(as.numeric(TDF_duration)), probs = c(0.025, 0.975), na.rm = TRUE)))
  )
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `CI95 = list(...)`.
## Caused by warning:
## ! 'tidy.numeric' is deprecated.
## See help("Deprecated")
median_TDF_duration <- summary_TDF_duration$median_TDF_duration
CI95_lower <- summary_TDF_duration$CI95[[1]]$x[1]
CI95_upper <- summary_TDF_duration$CI95[[1]]$x[2]

median_TDF_duration
## [1] 291.5
CI95_lower
## [1] 30.35
CI95_upper
## [1] 2455.8
rm(list = "summary_TDF_duration", "patients_TDF_bl")

Join ‘TDF_ttt’ with ‘hcv_hdv_markers’ and then with the ‘SchistoHBV’ dataset

df <- left_join(hcv_hdv_markers, TDF_ttt, by = "pid")

SchistoHBV <- left_join(SchistoHBV, df, by = "pid") %>% 
  mutate(employ = str_to_title(employ),
         alcohol_cons = str_to_title(alcohol_cons),
         drugs = str_to_title(drugs),
         smoke_yn = str_to_title(smoke_yn),
         HTA = str_to_title(HTA),
         diabete = str_to_title(diabete),
         BMI = str_to_title(BMI),
         dyslipidemia = str_to_title(dyslipidemia),
         liver_dis_FDR = str_to_title(liver_dis_FDR),
         hcc_FDR = str_to_title(hcc_FDR))

reactable(SchistoHBV)
rm(list = "df", "TDF_ttt", "hcv_hdv_markers")

LIVER MARKERS

Create temporary dataset with all liver markers

liver_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, sex, alt_res, ast_res, plt_res, fibs_med_res, cap_med_res) %>%
  mutate(apri_score = ((ast_res/40)*100)/plt_res*1000) %>%
  mutate(apri_score = round(apri_score, 2)) %>%
  # recategorize variables
  mutate(alt_res = case_when(alt_res <= 40 ~ "≤40 IU/L",
                             alt_res > 80 ~ ">80 IU/L",
                             TRUE ~ "41-80 IU/L"),
         alt_elevated = case_when(sex == "Male" & alt_res > 32 ~ "High",
                             sex == "Female" & alt_res > 19 ~ "High",
                             TRUE ~ "Normal"),
         apri_diag = case_when(apri_score > 1.5 ~ "Fibrosis",
                               apri_score < 0.6 ~ "No Fibrosis",
                               is.na(apri_score) ~ "NA",
                               TRUE ~ "Unknown"),
         fibs_med_diag = case_when(fibs_med_res <= 7.0 ~ "F0-1 (no or mild fibrosis)",
                                   fibs_med_res > 7.0 & fibs_med_res < 11.1 ~ "F2–3 (significant fibrosis)",
                                   fibs_med_res >= 11.1 ~ "F4 (cirrhosis)",
                                   is.na(fibs_med_res) ~ "NA"),
         fibs7 = case_when(fibs_med_res >= 7.1 ~ "≥7.1 kPa",
                           TRUE ~ "<7.0 kPa"),
         fibs11 = case_when(fibs_med_res >= 11.1 ~ "≥11.1 kPa",
                           TRUE ~ "<11.0 kPa"),
         cap_med_diag = case_when(cap_med_res >= 248 ~ "SLD",
                                  is.na(cap_med_res) ~ "NA",
                                  TRUE ~ "Normal")
         ) %>% 
  select(pid, alt_res, alt_elevated, apri_score, apri_diag, fibs_med_res, fibs_med_diag, fibs7, fibs11, cap_med_res, cap_med_diag)

# SOURCES : Lala V, Zubair M, Minter DA. Liver Function Tests. [Updated 2023 Jul 30]. In: StatPearls [Internet]. Treasure Island (FL): StatPearls Publishing; 2024 Jan-. Available from: https://www.ncbi.nlm.nih.gov/books/NBK482489
## APRI :
## Fibrosis : Morse, C.G., et al., Transient elastography for the detection of hepatic fibrosis in HIV-monoinfected adults with elevated aminotransferases on antiretroviral therapy. Aids, 2015. 29(17): p. 2297-302.
## Steatosis : Karlas, T., et al., Individual patient data meta-analysis of controlled attenuation parameter (CAP) technology for assessing steatosis. J Hepatol, 2017. 66(5): p. 1022-1030.

reactable(liver_markers)

Analysis

# ALAT
liver_markers %>%
  tabyl(alt_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     alt_res   n percent
##  41-80 IU/L  38   4.76%
##    >80 IU/L  14   1.75%
##    ≤40 IU/L 747  93.49%
##       Total 799 100.00%
# APRI
liver_markers %>%
  tabyl(apri_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    apri_diag   n percent
##     Fibrosis   8   1.00%
##  No Fibrosis 770  96.37%
##      Unknown  21   2.63%
##        Total 799 100.00%
# FibroScan LSM
liver_markers %>%
  tabyl(fibs_med_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##                fibs_med_diag   n percent
##   F0-1 (no or mild fibrosis) 706  88.36%
##  F2–3 (significant fibrosis)  62   7.76%
##               F4 (cirrhosis)  31   3.88%
##                        Total 799 100.00%
# LSM ≥ 7.1 kPa
liver_markers %>%
  tabyl(fibs7) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     fibs7   n percent
##  <7.0 kPa 706  88.36%
##  ≥7.1 kPa  93  11.64%
##     Total 799 100.00%
# LSM ≥ 11.1 kPa
liver_markers %>%
  tabyl(fibs11) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     fibs11   n percent
##  <11.0 kPa 768  96.12%
##  ≥11.1 kPa  31   3.88%
##      Total 799 100.00%
# FibroScan CAP
liver_markers %>%
  tabyl(cap_med_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  cap_med_diag   n percent
##        Normal 731  91.49%
##           SLD  68   8.51%
##         Total 799 100.00%

I want to identify how many patients have discordant fibrosis diagnoses between ‘apri_diag’ and ‘fibs_med_diag’

discordant_patients <- liver_markers %>%
  filter((apri_diag == "No Fibrosis" & fibs_med_diag != "F0-1 (no or mild fibrosis)") |
         (apri_diag == "Fibrosis" & fibs_med_diag == "F0-1 (no or mild fibrosis)"))

# Print discordant patients (n=78)
reactable(discordant_patients)

Join ‘liver_markers’ with ‘SchistoHBV’ dataset

SchistoHBV <- left_join(SchistoHBV, liver_markers, by = "pid")
reactable(SchistoHBV)

URINE MARKERS

Create temporary dataset with all ‘urine markers’ variables needed

urine_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, protu_res, creu_res, leubu_res, hembu_res, protbu_res)

# create 'protein/creatinine ratio' variable (https://doi.org/10.1080/10408363.2020.1723487)
urine_markers <- urine_markers %>% 
  mutate(upcr_ratio = protu_res / creu_res) %>%
  mutate(upcr_ratio = round(upcr_ratio, 2)) %>% 
  mutate(upcr_diag = case_when(upcr_ratio > 0.2 ~ "Proteinuria (>0.2 mg/mg)",
                               TRUE ~ "Normal (≤0.2 mg/mg)")) %>%
# recategorize urine strip variables
  mutate(leubu_res = case_when(leubu_res >= 1 ~ "Leukocyturia",
                               leubu_res < 1 ~ "Normal")) %>%
  mutate(hembu_res = case_when(hembu_res >= 1 ~ "Hematuria",
                               hembu_res < 1 ~ "Normal")) %>%  
  mutate(protbu_res = case_when(protbu_res >= 1 ~ "Proteinuria",
                               protbu_res < 1 ~ "Normal")) %>%
  select(pid, protu_res, creu_res, upcr_ratio, upcr_diag, leubu_res, hembu_res, protbu_res)

reactable(urine_markers)
# source : 

Analysis

# UPCR Ratio
urine_markers %>% 
  tabyl(upcr_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##                 upcr_diag   n percent
##       Normal (≤0.2 mg/mg)  48   6.01%
##  Proteinuria (>0.2 mg/mg) 751  93.99%
##                     Total 799 100.00%
# LEUBU
urine_markers %>% 
  tabyl(leubu_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     leubu_res   n percent valid_percent
##  Leukocyturia 230  28.79%        28.86%
##        Normal 567  70.96%        71.14%
##          <NA>   2   0.25%             -
##         Total 799 100.00%       100.00%
# HEMBU
urine_markers %>% 
  tabyl(hembu_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  hembu_res   n percent valid_percent
##  Hematuria  97  12.14%        12.17%
##     Normal 700  87.61%        87.83%
##       <NA>   2   0.25%             -
##      Total 799 100.00%       100.00%
# PROTBU
urine_markers %>% 
  tabyl(protbu_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##   protbu_res   n percent valid_percent
##       Normal 680  85.11%        85.32%
##  Proteinuria 117  14.64%        14.68%
##         <NA>   2   0.25%             -
##        Total 799 100.00%       100.00%
  • There is a problem with the UPCR ratio right ?
  • NAs ? (n=2)

Join ‘urine_markers’ with ‘SchistoHBV’

SchistoHBV <- left_join(SchistoHBV, urine_markers, by = "pid")

ULTRASOUND (US) DATA

Create temporary dataset with all ‘Ultrasound’ variables needed

bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
##    pid      hiv_status enrol_d    center med   sen_yn country_orig region_orig
##    <chr>    <chr>      <date>     <fct>  <chr> <fct>  <fct>        <fct>      
##  1 SENB1002 HBV        2019-09-30 SMIT   1     yes    <NA>         Fatick     
##  2 SENB1003 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  3 SENB1004 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  4 SENB1005 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  5 SENB1006 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  6 SENB1007 HBV        2019-10-02 SMIT   1     yes    <NA>         Ziguinchor 
##  7 SENB1008 HBV        2019-10-02 SMIT   1     yes    <NA>         Kaolack    
##  8 SENB1009 HBV        2019-10-02 SMIT   1     yes    <NA>         Louga      
##  9 SENB1010 HBV        2019-10-02 SMIT   1     yes    <NA>         Diurbel    
## 10 SENB1011 HBV        2019-10-02 SMIT   1     no     Mali         <NA>       
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## #   dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## #   marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## #   other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## #   hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## #   hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …

OTHER MARKERS

Create temporary dataset with all ‘Other markers’ needed

other_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, trad_med, trad_med_type, plants_type, trad_other, cre_res, age_en, sex)

# trad_med
other_markers <- other_markers %>% 
  mutate(trad_med = case_when(trad_med == "no" ~ "None",
                              trad_med == "yes" & trad_med_type == "plant" ~ "Plant",
                              trad_med == "yes" & trad_med_type == "other" ~ "Oil/Powder",
                              is.na(trad_med) ~ "None"))

# eGFR
other_markers <- other_markers %>%
  mutate(
    eGFR = case_when(
      sex == 'Female' & cre_res <= 7 ~ 144 * (cre_res / 7)^(-0.329) * 0.993^age_en,
      sex == 'Female' & cre_res > 7  ~ 144 * (cre_res / 7)^(-1.209) * 0.993^age_en,
      sex == 'Male' & cre_res <= 9   ~ 141 * (cre_res / 9)^(-0.411) * 0.993^age_en,
      sex == 'Male' & cre_res > 9    ~ 141 * (cre_res / 9)^(-1.209) * 0.993^age_en,
      TRUE                           ~ NA_real_  # Use NA for any other cases (e.g., missing sex)
    )
  )

# eGFR diagnosis
other_markers <- other_markers %>% 
  mutate(eGFR_diag = case_when(eGFR >= 60 ~ "≥60",
                               eGFR <= 15 ~ "≤15",
                               TRUE ~ "15-60")) %>% 
  select(pid, trad_med, eGFR, eGFR_diag)

Analysis

# Traditional medicine
other_markers %>% 
  tabyl(trad_med) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    trad_med   n percent
##        None 529  66.21%
##  Oil/Powder   3   0.38%
##       Plant 267  33.42%
##       Total 799 100.00%
# eGFR
other_markers %>% 
  tabyl(eGFR_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  eGFR_diag   n percent
##      15-60  73   9.14%
##        ≥60 726  90.86%
##      Total 799 100.00%

Join ‘other_markers’ with ‘SchistoHBV’ dataset

SchistoHBV <- left_join(SchistoHBV, other_markers, by = "pid")

ADD CCA RESULTS

cca_result <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, cca_res, enrol_d, cca_bl_date) %>% 
  mutate(diff_cca = cca_bl_date - enrol_d) %>%
  select(-enrol_d)

SchistoHBV <- left_join(SchistoHBV, cca_result, by = "pid")

TABLE 2

SchistoHBV %>%
  select(-pid, -country_orig, -region_orig, -employ, -TDF_duration, -alt_elevated, -apri_score, -fibs_med_res, -fibs7, -fibs11, -cap_med_res, -protu_res, 
         -creu_res, -upcr_ratio, -eGFR, -cca_bl_date, -diff_cca) %>%
  mutate(age_cat = factor(age_cat, levels = c("18-31", ">31"))) %>%
  mutate(orig_endemicity = factor(orig_endemicity, levels = c("<1% prevalence (non-endemic)", "1-9.9% prevalence (low)", 
                                                              "10-49% prevalence (moderate)", "≥50% prevalence (high)", "Other Country"))) %>%
  mutate(alcohol_cons = factor(alcohol_cons, levels = c("Yes_past", "No"))) %>% 
  mutate(drugs = factor(drugs, levels = c("Yes_now", "Yes_past", "No"))) %>% 
  mutate(smoke_yn = factor(smoke_yn, levels = c("Yes_now", "Yes_past", "No"))) %>% 
  mutate(BMI = factor(BMI, levels = c("Normal Weight", "Underweight", "Overweight", "Obesity"))) %>% 
  mutate(cvb_detection = factor(cvb_detection, levels = c("≤20 IU/mL", "21-2000 IU/mL", "2001-20000 IU/mL", "≥20000 IU/mL"))) %>% 
  mutate(qAgHBs_cat = factor(qAgHBs_cat, levels = c("<100 IU/mL", "100-1000 IU/mL", ">1000 IU/mL"))) %>% 
  mutate(alt_res = factor(alt_res, levels = c("≤40 IU/L", "41-80 IU/L", ">80 IU/L"))) %>% 
  mutate(cap_med_diag = factor(cap_med_diag, levels = c("SLD", "Normal"))) %>%
  mutate(upcr_diag = factor(upcr_diag, levels = c("Proteinuria (>0.2 mg/mg)", "Normal (<0.2 mg/mg)"))) %>% 
  mutate(protbu_res = factor(protbu_res, levels = c("Proteinuria", "Normal"))) %>%
  mutate(trad_med = factor(trad_med, levels = c("Plant", "Oil/Powder", "None"))) %>%
  tbl_summary(by = cca_res,
              missing = "no",
              statistic = list(all_categorical() ~ "{n} / {N} ({p}%)"),
              missing_text = "(Missing)",
              label = list(screen_d ~ "Screening Year", age_cat ~ "Age", orig_endemicity ~ "Origin", reg_act ~ "Residence", 
                           alcohol_cons ~ "Alcohol Consumption", audit ~ "AUDIT-C Score", etg_res ~ "Dipstick EtG", 
                           drugs ~ "Drug Consumption", smoke_yn ~ "Smoking", HTA ~ "Hypertension", diabete ~ "Diabetes", 
                           dyslipidemia ~ "Dyslipidemia", liver_dis_FDR ~ "Family History of Liver Disease", 
                           hcc_FDR ~ "Family History of HCC", cvb_detection ~ "HBV viral load", qAgHBs_cat ~ "qAgHBs", 
                           AgHBe_res ~ "AgHBe", AcVHC_res ~ "HCV Serology", AcVHD_res ~ "HDV Serology", on_TDF_bl ~ "On TDF at Baseline",
                           alt_res ~ "ALAT", apri_diag ~ "APRI Score", fibs_med_diag ~ "FibroScan LSM", cap_med_diag ~ "FibroScan CAP", 
                           upcr_diag ~ "UPCR Ratio", leubu_res ~ "Leukocyturia (Urine Strips)", hembu_res ~ "Hematuria (Urine Strips)", 
                           protbu_res ~ "Proteinuria (Urine Strips)", trad_med ~ "Traditional Medicine", eGFR_diag ~ "eGFR")) %>%
  add_overall(last = TRUE, col_label = "**Total** (N = {N})") %>% 
  add_p(pvalue_fun = ~ style_pvalue(.x, digits = 2)) %>%
  modify_header(label ~ "**Variable**") %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**CCA Result**") %>%
  modify_caption("**Table 2. Patient Characteristics**") %>%
  bold_labels() %>% 
  as_gt()
## 78 observations missing `cca_res` have been removed. To include these observations, use `forcats::fct_na_value_to_level()` on `cca_res` column before passing to `tbl_summary()`.
Table 2. Patient Characteristics
Variable CCA Result Total (N = 721)1 p-value2
Positive, N = 601 Negative, N = 6611
Screening Year


<0.001
    2019 20 / 60 (33%) 61 / 661 (9.2%) 81 / 721 (11%)
    2020 2 / 60 (3.3%) 57 / 661 (8.6%) 59 / 721 (8.2%)
    2021 36 / 60 (60%) 343 / 661 (52%) 379 / 721 (53%)
    2022 2 / 60 (3.3%) 195 / 661 (30%) 197 / 721 (27%)
    2023 0 / 60 (0%) 5 / 661 (0.8%) 5 / 721 (0.7%)
Age


0.42
    18-31 32 / 60 (53%) 317 / 661 (48%) 349 / 721 (48%)
    >31 28 / 60 (47%) 344 / 661 (52%) 372 / 721 (52%)
Sex


0.52
    Male 30 / 60 (50%) 359 / 661 (54%) 389 / 721 (54%)
    Female 30 / 60 (50%) 302 / 661 (46%) 332 / 721 (46%)
Origin


0.69
    <1% prevalence (non-endemic) 17 / 59 (29%) 230 / 661 (35%) 247 / 720 (34%)
    1-9.9% prevalence (low) 12 / 59 (20%) 150 / 661 (23%) 162 / 720 (23%)
    10-49% prevalence (moderate) 21 / 59 (36%) 204 / 661 (31%) 225 / 720 (31%)
    ≥50% prevalence (high) 8 / 59 (14%) 62 / 661 (9.4%) 70 / 720 (9.7%)
    Other Country 1 / 59 (1.7%) 15 / 661 (2.3%) 16 / 720 (2.2%)
Residence


0.22
    Dakar 52 / 60 (87%) 530 / 661 (80%) 582 / 721 (81%)
    Outside 8 / 60 (13%) 131 / 661 (20%) 139 / 721 (19%)
Alcohol Consumption


>0.99
    Yes_past 1 / 59 (1.7%) 18 / 631 (2.9%) 19 / 690 (2.8%)
    No 58 / 59 (98%) 613 / 631 (97%) 671 / 690 (97%)
AUDIT-C Score


>0.99
    Misuse 1 / 60 (1.7%) 12 / 661 (1.8%) 13 / 721 (1.8%)
    Negative 59 / 60 (98%) 649 / 661 (98%) 708 / 721 (98%)
Dipstick EtG


0.023
    Positive 5 / 57 (8.8%) 135 / 629 (21%) 140 / 686 (20%)
    Negative 52 / 57 (91%) 494 / 629 (79%) 546 / 686 (80%)
Drug Consumption


>0.99
    Yes_now 0 / 60 (0%) 0 / 661 (0%) 0 / 721 (0%)
    Yes_past 1 / 60 (1.7%) 10 / 661 (1.5%) 11 / 721 (1.5%)
    No 59 / 60 (98%) 651 / 661 (98%) 710 / 721 (98%)
Smoking


0.45
    Yes_now 0 / 60 (0%) 15 / 661 (2.3%) 15 / 721 (2.1%)
    Yes_past 7 / 60 (12%) 57 / 661 (8.6%) 64 / 721 (8.9%)
    No 53 / 60 (88%) 589 / 661 (89%) 642 / 721 (89%)
Hypertension 11 / 60 (18%) 164 / 661 (25%) 175 / 721 (24%) 0.26
Diabetes 2 / 60 (3.3%) 18 / 661 (2.7%) 20 / 721 (2.8%) 0.68
BMI


0.87
    Normal Weight 35 / 60 (58%) 378 / 655 (58%) 413 / 715 (58%)
    Underweight 10 / 60 (17%) 99 / 655 (15%) 109 / 715 (15%)
    Overweight 12 / 60 (20%) 124 / 655 (19%) 136 / 715 (19%)
    Obesity 3 / 60 (5.0%) 54 / 655 (8.2%) 57 / 715 (8.0%)
Dyslipidemia 20 / 60 (33%) 274 / 661 (41%) 294 / 721 (41%) 0.22
Family History of Liver Disease 0 / 60 (0%) 12 / 661 (1.8%) 12 / 721 (1.7%) 0.61
Family History of HCC 5 / 60 (8.3%) 86 / 661 (13%) 91 / 721 (13%) 0.30
HBV viral load


0.98
    ≤20 IU/mL 10 / 60 (17%) 123 / 661 (19%) 133 / 721 (18%)
    21-2000 IU/mL 33 / 60 (55%) 356 / 661 (54%) 389 / 721 (54%)
    2001-20000 IU/mL 11 / 60 (18%) 123 / 661 (19%) 134 / 721 (19%)
    ≥20000 IU/mL 6 / 60 (10%) 59 / 661 (8.9%) 65 / 721 (9.0%)
qAgHBs


0.35
    <100 IU/mL 8 / 60 (13%) 54 / 661 (8.2%) 62 / 721 (8.6%)
    100-1000 IU/mL 6 / 60 (10%) 85 / 661 (13%) 91 / 721 (13%)
    >1000 IU/mL 46 / 60 (77%) 522 / 661 (79%) 568 / 721 (79%)
AgHBe


0.65
    Positive 2 / 60 (3.3%) 15 / 661 (2.3%) 17 / 721 (2.4%)
    Negative 58 / 60 (97%) 646 / 661 (98%) 704 / 721 (98%)
HCV Serology


>0.99
    Positive 0 / 60 (0%) 1 / 661 (0.2%) 1 / 721 (0.1%)
    Negative 60 / 60 (100%) 660 / 661 (100%) 720 / 721 (100%)
HDV Serology


0.50
    Positive 1 / 59 (1.7%) 7 / 660 (1.1%) 8 / 719 (1.1%)
    Negative 58 / 59 (98%) 653 / 660 (99%) 711 / 719 (99%)
On TDF at Baseline 2 / 60 (3.3%) 41 / 661 (6.2%) 43 / 721 (6.0%) 0.57
ALAT


>0.99
    ≤40 IU/L 56 / 60 (93%) 616 / 661 (93%) 672 / 721 (93%)
    41-80 IU/L 3 / 60 (5.0%) 33 / 661 (5.0%) 36 / 721 (5.0%)
    >80 IU/L 1 / 60 (1.7%) 12 / 661 (1.8%) 13 / 721 (1.8%)
APRI Score


0.30
    Fibrosis 0 / 60 (0%) 8 / 661 (1.2%) 8 / 721 (1.1%)
    No Fibrosis 57 / 60 (95%) 638 / 661 (97%) 695 / 721 (96%)
    Unknown 3 / 60 (5.0%) 15 / 661 (2.3%) 18 / 721 (2.5%)
FibroScan LSM


0.14
    F0-1 (no or mild fibrosis) 56 / 60 (93%) 580 / 661 (88%) 636 / 721 (88%)
    F2–3 (significant fibrosis) 1 / 60 (1.7%) 55 / 661 (8.3%) 56 / 721 (7.8%)
    F4 (cirrhosis) 3 / 60 (5.0%) 26 / 661 (3.9%) 29 / 721 (4.0%)
FibroScan CAP


0.14
    SLD 2 / 60 (3.3%) 59 / 661 (8.9%) 61 / 721 (8.5%)
    Normal 58 / 60 (97%) 602 / 661 (91%) 660 / 721 (92%)
UPCR Ratio


>0.99
    Proteinuria (>0.2 mg/mg) 60 / 60 (100%) 618 / 618 (100%) 678 / 678 (100%)
    Normal (<0.2 mg/mg) 0 / 60 (0%) 0 / 618 (0%) 0 / 678 (0%)
Leukocyturia (Urine Strips)


0.88
    Leukocyturia 17 / 60 (28%) 193 / 659 (29%) 210 / 719 (29%)
    Normal 43 / 60 (72%) 466 / 659 (71%) 509 / 719 (71%)
Hematuria (Urine Strips)


0.67
    Hematuria 6 / 60 (10%) 78 / 659 (12%) 84 / 719 (12%)
    Normal 54 / 60 (90%) 581 / 659 (88%) 635 / 719 (88%)
Proteinuria (Urine Strips)


0.83
    Proteinuria 10 / 60 (17%) 103 / 659 (16%) 113 / 719 (16%)
    Normal 50 / 60 (83%) 556 / 659 (84%) 606 / 719 (84%)
Traditional Medicine


0.72
    Plant 18 / 60 (30%) 222 / 661 (34%) 240 / 721 (33%)
    Oil/Powder 0 / 60 (0%) 2 / 661 (0.3%) 2 / 721 (0.3%)
    None 42 / 60 (70%) 437 / 661 (66%) 479 / 721 (66%)
eGFR


0.32
    ≥60 52 / 60 (87%) 599 / 661 (91%) 651 / 721 (90%)
    15-60 8 / 60 (13%) 62 / 661 (9.4%) 70 / 721 (9.7%)
1 n / N (%)
2 Fisher’s exact test; Pearson’s Chi-squared test